Data

The data contains the following variables on all medal winners in all winter Olympics from 1924 to 2014:

  • Year: year of the winter Olympics
  • City: city the Olympics is held
  • Sport: the type of sport
  • Discipline: a grouping of disciplines
  • Event: the particular event / competition
  • Athlete: name of the athlete
  • Country: country origin of the athlete
  • Gender: gender of the athlete
  • Medal: type of medal won

An event is a competition in a sport or discipline that gives rise to a ranking. Thus, skiing is a sport, while cross-country skiing, Alpine skiing, snowboarding, ski jumping and Nordic combined are disciplines. Alpine skiing is a discipline, while the super-G, giant slalom, slalom and combined are events.

In addition, information about the countries is available in a separate dataset including the IOC Country Code, Population, and GDP per capita. All visualizations below use one or more of these datasets.

Visualizations

# Read in data 
olymp <- read.csv("../data/winter.csv")
gdp <- read.csv("../data/dictionary.csv")

olymp$Year <- as.Date(strptime(olymp$Year, format = "%Y"))
colnames(gdp) <- c("Country_Full", "Country", "Population", "GDP.per.Capita")

1. Medal Counts over Time

Because some countries that competed under different designations in the past (e.g. Germany and Russia), I chose to combine Russia with URS, Germany with EUA, FRG and GDR, YUG with Bosnia and Czech Republic with TCH. I felt these designations were fair and didn’t overweight countries unfairly.

For convenience, below is a summary of the number of winter Olympics games each country medaled in.

Summary of Olympics Games Medaled In

# How many winter game Medal types per country
# Combine RUS and URS, and GER, EUA, FRG, GDR
olymp$Country <- as.character(olymp$Country)
olymp$Country <- ifelse(olymp$Country == "URS", "RUS", olymp$Country) # Russia
olymp$Country <- ifelse(olymp$Country == "EUA", "GER", olymp$Country) # Germany
olymp$Country <- ifelse(olymp$Country == "FRG", "GER", olymp$Country)
olymp$Country <- ifelse(olymp$Country == "GDR", "GER", olymp$Country)
olymp$Country <- ifelse(olymp$Country == "TCH", "CZE", olymp$Country) # Czech
olymp$Country <- ifelse(olymp$Country == "YUG", "BIH", olymp$Country) # For purposes of BIH hosting Olympics games; only Croatia & Slovenia have medals
olymp$Country <- as.factor(olymp$Country)

descriptive1 <- olymp %>% 
  mutate(Medal = factor(Medal, levels = c("Gold", "Silver", "Bronze"))) %>%
  group_by(Country, Medal) %>% 
  mutate(medal_yr_cnt = length(Medal)) %>%
  select(Country, Medal, medal_yr_cnt) %>%
  unique() %>% 
  spread(key = Medal, value = medal_yr_cnt) %>% 
  ungroup() %>% 
  select(Country, Gold, Silver, Bronze)
descriptive1[is.na(descriptive1)] <- 0

# How many years in Olympics
descriptive2 <- olymp %>% 
  select(Country, Year) %>% 
  group_by(Country) %>% 
  unique() %>% 
  summarize(num_olymp = n())

# Combination -- Number of Olympics and Medals per Country
descriptive <- inner_join(descriptive1, descriptive2, by = "Country")
descriptive$total_medals <- rowSums(descriptive[, c(2,3,4)])

# Joining GDP information for later
descriptive_gdp <- inner_join(descriptive, gdp, by = "Country")

# How many games participated 
library(ggalt)
descriptive %>% 
  filter(num_olymp > 5) %>% 
  ggplot(aes(x = reorder(Country, num_olymp), y = num_olymp)) +
  geom_lollipop(point.colour = "lightblue", point.size = 2) +
  labs(x = "", y = "", 
       title = "Number of Winter Olympics Medaled In", 
       subtitle = "Winter Olympics 1924 - 2014",
       caption = "*Countries that medaled in more than five Winter Olympics") +
  coord_flip() +
  theme_minimal()

descriptive %>% 
  filter(num_olymp <= 5) %>% 
  ggplot(aes(x = reorder(Country, num_olymp), y = num_olymp)) +
  geom_lollipop(point.colour = "lightblue", point.size = 2) +
  labs(x = "", y = "", 
       title = "", 
       subtitle = "",
       caption = "*Countries that medaled in fewer than five Winter Olympics") +
  coord_flip(ylim = c(0, 25)) +
  theme_minimal()

Many of the following analyses will use data from the top 10 medal-winning countries, created by ranking countries by the number of medals won in total over all years.

#### Additional Data Wrangling
# Who are the top 10 medal producers over time?
top10 <- descriptive %>% 
  arrange(desc(total_medals)) %>% 
  mutate(rank = row_number()) %>% 
  filter(rank <= 10)
top10
## # A tibble: 10 x 7
##    Country  Gold Silver Bronze num_olymp total_medals  rank
##    <fct>   <dbl>  <dbl>  <dbl>     <int>        <dbl> <int>
##  1 RUS     344    187      172        15          703     1
##  2 USA     167    319      167        22          653     2
##  3 GER     226    208      203        20          637     3
##  4 CAN     315    203      107        22          625     4
##  5 NOR     159    171      127        22          457     5
##  6 FIN      66.0  147      221        22          434     6
##  7 SWE     127    129      177        22          433     7
##  8 SUI      76.0   77.0    132        21          285     8
##  9 AUT      79.0   98.0    103        22          280     9
## 10 CZE      30.0   92.0    111        16          233    10
# Calculate how many medals per year 
medalsyear <- olymp %>% 
  group_by(Year, Country) %>% 
  filter(Country %in% top10$Country) %>% 
  summarize(medal_yr_cnt = length(Medal)) 

# Split by gold, silver, bronze medals, per year
medals_year <- olymp %>% 
  mutate(Medal = factor(Medal, levels = c("Gold", "Silver", "Bronze"))) %>%
  group_by(Year, Country, Medal) %>% 
  summarize(medal_yr_cnt = length(Medal)) 

# filter for only countries that have at least 100 medals
medals100 <- olymp %>% 
  group_by(Country) %>% 
  summarize(total_medals = length(Medal)) %>% 
  filter(total_medals >= 100)
Olympic Medals Over Time

To visualize how many medals the top 10 medal-winning countries won over the 1920-2014 Winter Olympics, I created two wrapped plots. Both use the colors gold, silver and bronze to display how many of each medal type was won over time. I would recommend the second of these plots to my editor, as I believe the colors and trends are clearer in an area plot than in a scatter plot. Something that immediately jumps out is that while Canada and USA seem to have won similarly large amounts of medals over time, the US has won many more silver and gold, while Canada seems to focus on gold.

# Split by Country, over time 
#png("../fig/medcountry_time.png")
medcountry <- medals_year %>%  
  filter(Country %in% top10$Country) %>% 
  ggplot(aes(x = Year, y = medal_yr_cnt, na.rm = TRUE)) +
  geom_jitter(aes(color = Medal)) +
  scale_color_manual(values=c("gold","grey73","darkgoldenrod4")) +
  labs(x = "", y = "# Medals Won",
       title = "Olympic Medals by Country",
       subtitle = "Winter Olympics 1924 - 2014") +
  facet_wrap(~Country) +
  theme_light() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(strip.background = element_rect(fill = "lightskyblue2")) 
medcountry

#The winning "over time" plot
medals_year %>%
  ungroup() %>% 
  filter(Country %in% top10$Country) %>% 
  ggplot(aes(x = Year, y = medal_yr_cnt, na.rm = TRUE)) +
  geom_area(aes( fill = Medal), alpha = 0.7) +
  scale_fill_manual(values=c("gold","grey73","darkgoldenrod4")) +
  labs(x = "", y = "# Medals Won",
       title = "Olympic Medals by Country",
       subtitle = "Winter Olympics 1924 - 2014") +
  facet_wrap(~Country) +
  theme_light() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(strip.background = element_rect(fill = "lightskyblue2"))

Total Medal Visualizations

To visualize the total amount of medals won by the top 10 medal-winning countries, I produced a lollipop chart for a quick glance, then a bar chart that includes countries that have won at least 100 medals over time with the same gold, silver and bronze coloring to add more information and to make the stacks immediately obvious to the reader. One hundred medals felt like a natural cut-off point between the highest perorming countries and the mid-performing countries. It turned out there are only 13 countries that have won at least 100 medals over time, so the second chart isn’t much longer than the first. I propose that the editor uses the more colorful chart since it conveys more information (how many of each type of medal won) and is likely more intriguing to the reader.

top10 %>% 
  ggplot(aes(x = reorder(Country, total_medals), y = total_medals)) +
  geom_lollipop(point.colour = "lightblue", point.size = 2) +
  labs(x = "", y = "", 
       title = "Winter Olympics Medals Overall", 
       subtitle = "Winter Olympics 1924 - 2014: Number of Medals Won by Top 10 Medal-Winning Countries") +
  coord_flip() +
  theme_minimal()

# Viz for top medal producing countries, at least 100 medals 
totalmedals_plot <- olymp %>% 
  mutate(Medal = factor(Medal, levels = c("Gold", "Silver", "Bronze"))) %>%
  group_by(Country, Medal) %>% 
  filter(Country %in% medals100$Country) %>% 
  summarize(total_medals = length(Medal))

#png("../fig/totalmedals.png")
totalmedals <- totalmedals_plot %>% 
  ggplot(aes(x = reorder(Country, total_medals), y = total_medals, 
             fill = Medal, group = Medal)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = total_medals), position = position_stack(vjust = 0.5),
            size = 3, color = "white") +
  scale_fill_manual(values=c("gold","grey73","darkgoldenrod4")) +
  labs(x = "", y = "", 
       title = "Winter Olympic Medals Overall", 
       subtitle = "Winter Olympics 1924 - 2014: Countries with Over 100 Medals") +
  theme_light()
totalmedals

2. Medal Counts adjusted by Population, GDP

I created three separate rankings of success per country by GDP per capita, population, and total number of medals won. I then chose to visualize this data in a few different ways. First, I visualized the percentage of medals won divided by the population of each country to provide a calculation of “medal per person in entire population.” Countries are ordered by GDP/Capita ranking, thus, countries with high GDP/Capita and a low percentage of medals/population are highlighted in yellow. LUX, for instance, has the highest GDP/Capita out of all the countries yet a very low percentage of medals/population. SUI and NOR’s success makes sense, given they have high GDP/Capita and high percentage of medals/population. The USA, however, could do better!

# Data prep
by_pop_gdp <- descriptive_gdp %>% 
  arrange(desc(total_medals)) %>% 
  mutate(medal_rank = row_number()) %>% 
  arrange(desc(GDP.per.Capita), desc(total_medals)) %>% 
  mutate(gdp_medal_rank = row_number()) %>% 
  arrange(desc(Population), desc(total_medals)) %>% 
  mutate(pop_medal_rank = row_number()) %>% 
  filter(medal_rank <10 | gdp_medal_rank <10 |pop_medal_rank <10)


# GDP Viz
# by_pop_gdp %>% 
#   ggplot(aes(x = reorder(Country, gdp_medal_rank), y = total_medals)) +
#   geom_bar(aes(fill = total_medals < 20), stat = "identity") +
#   scale_fill_manual(values=c("grey","gold")) +
#   labs(x = "Country in Order of GDP/Capita Ranking", y = "Medals Won Overall", 
#        title = "Some Countries Should Be Doing Better", 
#        subtitle = "Winter Olympic Medals by Countries in Order of Highest GDP/Capita") +
#   theme_light() +
#   theme(legend.position = "none")

# GDP Viz
by_pop_gdp %>% 
  ggplot(aes(x = reorder(Country, gdp_medal_rank), y = (total_medals/Population * 100))) +
  geom_bar(aes(fill = (total_medals/Population* 100) < 0.0025), stat = "identity", alpha = 0.9) +
  scale_fill_manual(values=c("grey","gold")) +
  labs(x = "Country in Order of GDP/Capita Ranking", y = "% of Medal Winners/Pop", 
       title = "Some Countries Should Be Doing Better", 
       subtitle = "Winter Olympic Medals/Population of Countries") +
  theme_light() +
  theme(legend.position = "none") 

The next visualization shows the countries lined up by population size and compares how many medals they’ve won in total. Countries with the fewest medals are “shamed” by being highlighted in yellow. My argument for showing the data in this matter is that countries with larger populations have more opportunity for top-performing athletes. China and Japan are both countries with particularly large populations that don’t tend to win as many medals as other countries with large populations.

# Pop Viz 
by_pop_gdp %>% 
  ggplot(aes(x = reorder(Country, pop_medal_rank), y = total_medals)) +
  geom_bar(aes(fill = total_medals < Population/500000), stat = "identity", alpha = 0.9) +
  scale_fill_manual(values=c("grey","gold")) +
  labs(x = "Country in Order of Largest Population", y = "Medals Won Overall", 
       title = "Some Countries Should Be Doing Better", 
       subtitle = "Countries that have enough people to win more medals") +
  theme_light() +
  theme(legend.position = "none")

I then adjusted the visualizations to be bubble charts that may be slightly more aesthetically pleasing. I calculated the true GDP as GDP per Capita multiplied by the population to make the axes arguably more comparable. The following viz points out China, specifically, as a country with both a lot of people and a lot of money, but we can see from the size of its bubble that China is not a very big medal producer. I don’t love shaming individual countries, so I’d suggest the editor uses the final visualization in publication.

by_pop_gdp %>% 
  ggplot(aes(x = log(Population), y = log(GDP.per.Capita * Population))) +
  geom_count(aes(color = Country, size = total_medals, fill = Country, alpha = 0.7), show.legend = FALSE) +
  scale_size_area(max_size = 20) +
  geom_text(aes(label = Country), check_overlap = TRUE, size = 3) +
  theme_light() +
  theme(axis.text = element_blank()) +
  theme(axis.ticks = element_blank()) +
  labs(x = expression(atop("Population", paste(symbol('\256')))), 
       y = expression(atop("GDP",paste(symbol('\256')))), 
       title = "China Should Be Doing Better", 
       subtitle = "Winter Olympics 1924 - 2014: Medals per Pop & GDP of Country",
       caption = "Bigger Bubbles = More Medals Won") +
  theme(axis.title.y = element_text(size = 12)) + 
  theme(axis.title.x = element_text(size = 12)) 

Lastly, I visualized the percentage of medals won by the country’s population, again, and showed how many total medals countries won overall via the size of each bubble. From this viz, it’s clear that Norway and Finland do well for themselves given the large amount of medals they’ve won despite their smaller populations.

#png("../fig/norwaymedals.png")
norway <- by_pop_gdp %>% 
  ggplot(aes(x = log(Population), y = total_medals/Population *100)) +
  geom_count(aes(color = Country, size = total_medals, 
                 fill = Country, alpha = 0.7), show.legend = FALSE) +
  #scale_fill_manual(values=c("purple","yellow")) +
  scale_size_area(max_size = 20) +
  geom_text(aes(label = Country), check_overlap = TRUE, size = 3) +
  theme_classic() +
  theme(axis.text = element_blank()) +
  theme(axis.ticks = element_blank()) +
  labs(x = expression(atop("Population", paste(symbol('\256')))), 
       y = expression(atop("% of Medals per Pop",paste(symbol('\256')))), 
       title = "Norway & Finland Citizens Dominate", 
       subtitle = "Winter Olympics 1924 - 2014: Percentage of Medals Won per Population",
       caption = "Bigger Bubbles = More Medals Won Overall") +
  theme(axis.title.y = element_text(size = 12)) + 
  theme(axis.title.x = element_text(size = 12)) 
  #geom_encircle(data = subset(by_pop_gdp, (total_medals/Population *100 > 0.0040)))
norway

3. Host Country Advantage

I calculated whether countries won more medals when they hosted Winter Olympics as opposed to when they were visiting participants. To do so, I downloaded necessary country host information from Wikipedia. I manipulated the dataset so that I could measure how many medals a country won per year, but also could highlight the year(s) that they hosted the Olympics. I chose to only use countries that have hosted the Olympics in my plots and calculations for a more intuitive comparison.

library(rvest)
library(stringr)
wiki_hosts <- read_html("https://en.wikipedia.org/wiki/Winter_Olympic_Games")
hosts <- html_table(html_nodes(wiki_hosts, "table")[[5]], fill=TRUE)
hosts <- hosts[-1,1:3]
hosts$city <- str_split_fixed(hosts$Host, n=2, ",")[,1]
hosts$country <- str_split_fixed(hosts$Host, n=2, ",")[,2]
hosts$Year <- as.Date(strptime(hosts$Year, format = "%Y"))
hosts$Year <- format(hosts$Year, '%Y')
medals_year$Year <- format(medals_year$Year, '%Y') # weird issues with year 

hosts <- hosts %>% 
  select(Year, country)
colnames(hosts)[colnames(hosts) == "country"] <- "Host_Country"

# Join tables for Full Country Name, Year, number of medals won and whether they hosted
hosts <- merge(medals_year, hosts, on = "Year")
hosts <- merge(hosts, descriptive_gdp, on = "Country")
hosts$Host_Country <- as.character(hosts$Host_Country)
hosts$Country_Full <- as.character(hosts$Country_Full)
hosts$Host_Country <- trimws(hosts$Host_Country)
hosts <- hosts %>% 
  select(Year, Country, Country_Full, Medal, medal_yr_cnt, Host_Country) %>% 
  mutate(hosted = ifelse(Country_Full == Host_Country, 1, 0)) %>% 
  spread(key = Medal, value = medal_yr_cnt)
hosts[is.na(hosts)] <- 0 
hosts$total_medals <- rowSums(hosts[, c(6,7,8)])

I began visualizing the data with a wrapped bar chart for each country that has ever hosted in the Olympics. I chose stark colors (grey and blue) so the reader can easily see a country’s total number of medals won when they were hosts versus when they were visiting participants. It’s clear from the visualization that most countries do exceedingly better, if not, at least as good as they’ve ever done, when they host.

#png("../fig/hostplot1.png")
hosts %>% 
  filter(Country_Full %in% hosts$Host_Country) %>% 
  ggplot(aes(x = Year, y = total_medals)) +
  geom_histogram(aes(fill = as.factor(hosted)), stat = "identity") +
  facet_wrap(~Country_Full, nrow = 4) +
  theme_minimal() +
  theme(axis.text = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(strip.background = element_rect(fill = "lightskyblue2")) +
  scale_fill_manual(values=c("gray87","lightseagreen"), labels = c("Medals; Not Host", "Host")) +
  labs(x = expression(atop("1924 - 2014", paste(symbol('\256')))),
       y = "", 
       title = "Home Court Advantage", 
       subtitle = "Host Countries of the Winter Olympics 1924 - 2014",
       fill = "Medals Won Per Year") +
  theme(text = element_text(size = 11))

A downside of the next plot I created is that it’s slightly harder to compare the exact amount of medals won each year, but I find it particularly aesthetically pleasing. This plot, again, includes only countries that have hosted an Olympics and shows how many medals each country won at each winter Olympics. Tiles outlined in black indicate that the country hosted that year.

#png("../fig/hostplot2.png")
palette <- colorRampPalette(brewer.pal(9, 'GnBu'), space='Lab', bias = 5)
hosts$hosted <- as.factor(hosts$hosted)
home.court <- hosts %>% 
  filter(Country_Full %in% hosts$Host_Country) %>% 
  ggplot(aes(x = Year, y = Country)) +
  geom_raster(aes(fill = total_medals)) +
  geom_tile(aes(color = hosted), fill = "#00000000", size = 1, show.legend = FALSE) +
  theme_light() +
  scale_fill_gradientn(colors = palette(9)) +
  scale_color_manual(values = c('#00000000', 'black')) +
  labs(x="", y="", fill="Medals", 
       title = "Home Court Advantage", 
       subtitle = "Host Countries of the Winter Olympics 1924 - 2014", 
       caption = "Hosting country indicated by outline. ") +
  theme(
        legend.text=element_text(color="grey20"),
        axis.text.x=element_text(size=8),
        axis.ticks.y=element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid=element_blank(),
        plot.margin = unit(c(.5,1,0.3,1), "cm")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
  home.court

4. Country success by sport / discipline / event

First, I wanted to compare USA and Canadian Olympic performance especially because of their hockey rivalry. I adjusted the dataset to account for the fact that hockey is a large team sport and calculated the total number of medals as just one per win rather than per player. I then visualized the number of medals won per sport (i.e. discipline, see variable definitions in introduction) between the countries using a stacked bar chart. USA’s advantage over Canada is clear. To improve this plot, I’d suggest adding an interactive overlay to indicate the precise amount of medals per sport layer.

sports %>% 
  filter(Country == "USA" | Country == "CAN") %>% 
  group_by(Country, Sport) %>% 
  summarize(total_medals = sum(adj_medals)) %>% 
  ggplot(aes(x = Country, y = total_medals)) +
  geom_bar(aes(fill = Sport), stat = "identity", alpha = 0.9) + 
  scale_fill_brewer(palette = "Reds") +
  labs(x= "",
       y= " ",
       title = "U.S. vs. Canadian Olympic Medals", 
       subtitle = "Winter Olympics 1924 - 2014") +
  theme_light()

To best visualize the top 10 medal-winning countries, I’d suggest using the visualization below. It provides the reader with even more information but in a (hopefully) digestible way – the reader can tell how many of each medal type (gold, silver, bronze) each country won overall in each discipline. The top ten medal-winning countries happen to be best at Skiing and Skating sports as indicated by the darker blues. Germany, Norway and Russia seem to outperform the other countries in Biathlon, specifically, and Germany is talented in the Luge.

# Top 10 Medal Winning Countries Split by Sport
#png("../fig/sports.png")
palette <- colorRampPalette(brewer.pal(9, 'GnBu'), space='Lab', bias = 5)
sports <- sports %>% 
  filter(Country %in% top10$Country) %>% 
  ggplot(aes(x = Sport, y = Medal, fill = adj_medals)) + 
  geom_tile(color = "white") + 
  facet_grid(~Country) + 
  scale_fill_gradientn(colors = palette(9)) +
  theme_light() +
  coord_flip() +
  theme(strip.background = element_rect(fill = "lightskyblue2")) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(panel.grid=element_blank()) +
  labs(x= "",
       y= "",
       title = "Top 10 Medal-Winning Countries by Sport", 
       subtitle="Winter Olympics 1924 - 2014", 
       fill="# of Medals")
sports

5. Most successful athletes

Below is a visual display of the most successful winter Olympic athletes of all time. I manipulated the data such that I could create a count of medals won per Olympic athlete. Some athletes had multiple country designations, so I tried to choose which country made the most sense for the individual and labeled them as being part of that country. I created a category for “multiple sports” if a top medal-winning athlete participated and won medals in multiple disciplines. Norway tends to produce the top medal-winning winter athletes, which is not surprising given Norway’s clear success in prior plots.

# created the separate male and female datasets when I made two separate visualizations for men and women, but in the end decided to facet wrap with one dataset. 
athletes <- olymp %>% 
  mutate(Medal = factor(Medal, levels = c("Gold", "Silver", "Bronze"))) %>%
  group_by(Athlete, Gender, Medal) %>% 
  select(Country, Athlete, Discipline, Sport) %>%
  mutate(total_medals_per = length(Medal)) %>%
  ungroup()  %>% 
  unique()
athletes <- athletes %>% 
  group_by(Athlete, Gender, Country) %>% 
  mutate(totalmedals = sum(total_medals_per)) %>% 
  select(Athlete, Gender, Country, Discipline, Sport, totalmedals) %>% 
  unique() 
athletes$Discipline <- as.factor(athletes$Discipline)
athletes$Athlete <- as.factor(athletes$Athlete) 
athletes$totalmedals <- as.integer(athletes$totalmedals)

# top 20 overall
top_20 <- athletes %>% 
  select(Athlete, Gender, totalmedals, Sport, Discipline) %>% 
  unique() %>% 
  arrange(desc(totalmedals)) 
top_20 <- top_20[-10, ] # ridding of second country, looked up most represented country
top_20 <- top_20[1:20, ] 
top_20$Discipline <- as.character(top_20$Discipline)
top_20[3, 6] <- "Multiple Sports"
top_20[4, 6] <- "Multiple Sports"
top_20 <- unique(top_20)

# top 10 female
top_10_F <- athletes %>% 
  filter(Gender == "Women") %>% 
  arrange(desc(totalmedals))
top_10_F <- top_10_F[-5, ] 
top_10_F <- top_10_F[1:10, ]

# top 10 men
top_10_M <- athletes %>% 
  filter(Gender == "Men") %>% 
  arrange(desc(totalmedals))
top_10_M$Discipline <- as.character(top_10_M$Discipline)
top_10_M[3, 4] <- "Multiple Sports"
top_10_M[4, 4] <- "Multiple Sports"
top_10_M <- unique(top_10_M)
top_10_M <- top_10_M[1:10, ]

#png("../fig/athletes.png")
athletes <- top_20 %>% 
  ggplot(aes(reorder(x = Athlete, totalmedals), y = totalmedals)) +
  geom_bar(aes(fill = Country == "NOR"), alpha = 0.6, stat = "identity") +
  scale_fill_manual(values=c("grey","red"), labels = c("Other Country", "Norway")) +
  coord_flip() +
  theme_light() +
  labs(x= "",
       y= "Overall Medals",
       title = "Top 20 Medal-Winning Olympians: Norway Dominates
       ", 
       subtitle = "Winter Olympics 1924 - 2014") +
  theme(legend.title = element_blank())
athletes

I then split the athletes by gender. From the plots, we can see that women dominate in Cross Country Skiing while men dominate in the Biathlon. The top medal-winning men have won a few more medals than the top women.

top_20 %>% 
 ggplot(aes(reorder(x = Athlete, totalmedals), y = totalmedals)) +
  geom_bar(aes(fill = Discipline), alpha = 0.7, stat = "identity") +
  coord_flip(ylim = c(0, 15)) +
  theme_light() +
  theme(legend.position = "bottom") +
  labs(x= "",
       y= "Total Medals",
       title = "Top 10 Male & Female Medal-Winning Olympians", 
       subtitle = "Winter Olympics 1924 - 2014") +
  scale_fill_brewer(palette = "Dark2", name = "Sport") +
  theme(strip.background = element_rect(fill = "lightskyblue2")) +
  facet_wrap(~Gender, scales = "free")

#source("../lib/multiplot.R") 
# layout <- matrix(c(1, 1, 2, 2, 1, 1, 2, 2), 2, 4, byrow = TRUE)
# multiplot(top10F, top10M, layout = layout) 

Interactivity

6. Medals per Population

For my first interactive plot (created with plotly), I chose to add additional information to my bubble chart that represented the number of medals per person/population of each country. Prior to adding interactivity, the scale for total medals won was less clear. The below plot allows the user to hover over each bubble and get a clearer picture of how many overall medals the country won.

library(plotly)
# Had to rearrange the ggplot to get rid of arrow element 
colnames(by_pop_gdp)[colnames(by_pop_gdp) == "total_medals"] <- "Total_Medals"
colnames(by_pop_gdp)[colnames(by_pop_gdp) == "Country"] <- "Code"
colnames(by_pop_gdp)[colnames(by_pop_gdp) == "Country_Full"] <- "Country"
norway2 <- by_pop_gdp %>% 
  mutate(medspop = Total_Medals/Population * 100) %>% 
  ggplot(aes(x = log(Population), y = Total_Medals/Population *100)) +
  geom_count(aes(color = Code, size = Total_Medals, 
                 fill = Code, alpha = 0.7, label = Country), show.legend = FALSE) +
   labs(x = "Population", 
       y = "% of Medals per Pop", 
       title = "Norway & Finland Citizens Dominate", 
       subtitle = "Winter Olympics 1924 - 2014: Percentage of Medals Won per Population",
       caption = "Bigger Bubbles = More Medals Won Overall") +
  scale_size_area(max_size = 20) +
  geom_text(aes(label = Code), check_overlap = TRUE, size = 3) +
  theme_classic() +
  theme(axis.text = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.title.y = element_text(size = 12)) + 
  theme(axis.title.x = element_text(size = 12)) +
  theme(legend.position = 'none')
  

plotly1 <- ggplotly(p = norway2, tooltip = c("Country", "Total_Medals"))
plotly1
#api_create(plotly1, filename = "norway_pop-plotly")

7. Host Country Advantage

My second interactive plot (created with HighCharter) I modeled off of my host country heat map visualization. The user can hover over each heat map tile to see whether the country hosted the Olympics that particular year and how many medals the country won. Ideally, this visualization would hold even more information so the user could explore the Winter Olympics through hovering over tiles.

#library("viridis")
library(highcharter)
x <- c("Total Medals:  ", "Host: ", "Year: ")
y <- sprintf("{point.%s}", c("total_medals", "hosted", "Year"))
tltip <- tooltip_table(x, y)


hosts2 <- hosts %>% 
  filter(Country_Full %in% Host_Country) 
hosts2$hosted <- ifelse(hosts2$hosted == 1, "Yes", "No")

hosthc <- hchart(hosts2, "heatmap", hcaes(x = Year, y = Country_Full, value = total_medals)) %>% 
  hc_colorAxis(stops = color_stops(10, (palette(10))),
               type = "columnrange") %>% 
  hc_tooltip(useHTML = TRUE, pointFormat = tltip, headerFormat = '') %>% 
  hc_title(text = "Home Court Advantage") %>% 
  hc_subtitle(text = "Winter Olympics 1924 - 2014: Host Country Performance") %>% 
  hc_legend(layout = "vertical", verticalAlign = "top",
            align = "right", valueDecimals = 0) %>% 
  hc_size(height = 500, width = 600) %>% 
  hc_yAxis(
    title = list(text = "", gridLineWidth = 0.5)) %>% 
  hc_xAxis(
    title = list(text = "", gridLineWidth = 0.5))
hosthc

8. Data Table

I chose to create a data table that provides the reader with information about each contending country in the Winter Olympics, including the country’s population, how many winter games the country has competed and medaled in, and the number of Gold, Silver and Bronze medals won. The small colored bars are supposed to serve as a visual representation of the proportion of medals that were Gold, Silver and Bronze. The user is able to filter the columns to gather any information that might be useful or interesting about the country-level data available.

library(DT)
descriptive_pop <- descriptive_gdp %>% 
  select(Country_Full, Population, num_olymp, Gold, Silver, Bronze, total_medals) 
descriptive_pop$total_medals <- as.integer(descriptive_pop$total_medals)

## Borrowed the sketch Code from: https://rstudio.github.io/DT/
sketch = htmltools::withTags(table(
  class = 'display',
  thead(
    tr(
      th(rowspan = 2, 'Country'),
      th(colspan = 2, 'Olympics'),
      th(colspan = 4, 'Medals')
    ),
    tr(
      lapply(c('Population', 'Number of Games', 'Gold', 'Silver', 'Bronze', 'Total'), th)
    )
  )
))

## Data Table 
options(DT.options = list(pageLength = 10, autoWidth = TRUE, columnDefs = list(list(width = '80px', targets = c(1, 2, 3, 4, 5, 6)))))
desc_dt <- descriptive_pop %>%
    datatable(rownames = FALSE, filter = list(position = "top"),
              colnames = c("Country" = "Country_Full", "# Winter Olympics Medaled" = "num_olymp", 
                           "# Gold Medals" = "Gold", "# Silver Medals" = "Silver",
                           "# Bronze Medals" = "Bronze",
                           "# Total Medals" = "total_medals"),
              width = 700, height = 600,
              options = list(language = list(sSearch = "Filter:")),
              
              container = sketch,
              caption = htmltools::tags$caption(
                # style borrowed from same R Studio Github above
              style = 'caption-side: bottom; text-align: center;', 
              'Table 1: ', htmltools::em('Winter Olympics 1924 - 2014.' ))) %>% 
    formatStyle('Country', fontWeight = 'bold') %>% 
    formatStyle(columns = c(2,3,4,5,6,7), fontSize = '80%') %>% 
    formatStyle('# Total Medals', fontWeight = 'bold', backgroundColor = "lightblue") %>% 
    formatStyle('# Gold Medals',
      background = styleColorBar(range(descriptive_pop$total_medals), 'gold'),
      backgroundSize = '90% 80%',
      backgroundRepeat = 'no-repeat',
      backgroundPosition = 'center') %>% 
    formatStyle('# Silver Medals',
      background = styleColorBar(range(descriptive_pop$total_medals), 'silver'),
      backgroundSize = '90% 80%',
      backgroundRepeat = 'no-repeat',
      backgroundPosition = 'center') %>% 
    formatStyle('# Bronze Medals',
      background = styleColorBar(range(descriptive_pop$total_medals), 'brown'),
      backgroundSize = '90% 80%',
      backgroundRepeat = 'no-repeat',
      backgroundPosition = 'center')

desc_dt